Descriptive
# Count of observations
# Create data set for analysis
sem <- doubleclick_clean[,c('Campaign','Keyword','Keyword Group','Publisher Name', 'Bid Strategy','Engine Click Thru %','Match Type','Trans. Conv. %','Total Cost/ Trans.','Impressions','Total Volume of Bookings')]
# Get a big picture understanding of the data
summary(sem)
## Campaign Keyword Keyword Group Publisher Name
## Length:4510 Length:4510 Length:4510 Length:4510
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Bid Strategy Engine Click Thru % Match Type Trans. Conv. %
## Length:4510 Min. : 0.000 Length:4510 Min. : 0.0000
## Class :character 1st Qu.: 1.532 Class :character 1st Qu.: 0.0000
## Mode :character Median : 4.106 Mode :character Median : 0.0000
## Mean : 11.141 Mean : 0.5693
## 3rd Qu.: 10.917 3rd Qu.: 0.0000
## Max. :200.000 Max. :900.0000
## Total Cost/ Trans. Impressions Total Volume of Bookings
## Min. : 0.00 Min. : 0 Min. : 0.0000
## 1st Qu.: 0.00 1st Qu.: 28 1st Qu.: 0.0000
## Median : 0.00 Median : 176 Median : 0.0000
## Mean : 27.61 Mean : 9284 Mean : 0.8734
## 3rd Qu.: 0.00 3rd Qu.: 844 3rd Qu.: 0.0000
## Max. :9597.17 Max. :8342415 Max. :439.0000
str(sem)
## 'data.frame': 4510 obs. of 11 variables:
## $ Campaign : chr "Western Europe Destinations" "Geo Targeted DC" "Air France Brand & French Destinations" "Air France Global Campaign" ...
## $ Keyword : chr "fly to florence" "low international airfare" "air discount france ticket" "[airfrance]" ...
## $ Keyword Group : chr "Florence" "Low International DC" "France" "Air France" ...
## $ Publisher Name : chr "Yahoo - US" "Yahoo - US" "MSN - Global" "Google - Global" ...
## $ Bid Strategy : chr "Unassigned" "Unassigned" "Position 2-5 Bid Strategy" "Position 1- 3" ...
## $ Engine Click Thru % : num 9.09 16.67 11.11 14.71 2.52 ...
## $ Match Type : chr "Advanced" "Advanced" "Broad" "Exact" ...
## $ Trans. Conv. % : num 900 100 100 3.39 12.5 ...
## $ Total Cost/ Trans. : num 0.257 0.625 0.388 1.156 2.2 ...
## $ Impressions : num 11 6 9 401 318 722 13 547 448 129 ...
## $ Total Volume of Bookings: num 9 1 1 2 1 2 1 2 1 1 ...
# Find out most frequently used bid strategy
table(sem$`Bid Strategy`)
##
## Pos 3-6 Position 1- 3
## 45 264
## Position 1-2 Target Position 1-4 Bid Strategy
## 285 151
## Position 2-5 Bid Strategy Position 5-10 Bid Strategy
## 333 2208
## Unassigned
## 1224
# Find out unique publishers
unique(sem$`Publisher Name`)
## [1] "Yahoo - US" "MSN - Global" "Google - Global"
## [4] "Overture - Global" "Google - US" "Overture - US"
## [7] "MSN - US"
# Average out the clickthroughs per publisher
clickthrough_publisher <- aggregate(sem$`Engine Click Thru %`, by=list(sem$`Publisher Name`), FUN=mean)
# Visualize average clickthroughs per publisher
plot_ly(clickthrough_publisher, x = clickthrough_publisher$`Group.1`, y=~`x`,title = 'Average Clickthrough rate of different publisher')%>%
layout(title = 'Clickthrough per Publisher', plot_bgcolor = "#e5ecf6",xaxis = list(title = 'Publisher'),yaxis = list(title = 'Clickthrough Rate in %'))
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#bar
## Warning: 'bar' objects don't have these attributes: 'title'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
# Sum up Transactions per publisher
transactions_publisher <- aggregate(sem$`Total Volume of Bookings`, by=list(sem$`Publisher Name`), FUN=sum)
# Visualize transactions per publisher
plot_ly(transactions_publisher, x = transactions_publisher$`Group.1`, y=~`x`,title = 'Transactions per publisher')%>%
layout(title = 'Transactions per publisher', plot_bgcolor = "#e5ecf6",xaxis = list(title = 'Publisher'),yaxis = list(title = 'Transactions'))
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#bar
## Warning: 'bar' objects don't have these attributes: 'title'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
# What are the overall average costs / transaction
avg_costs_transaction <- print(mean(sem$`Total Cost/ Trans.`))
## [1] 27.60745
# Average out the costs per transaction per publisher
costs_publisher <- aggregate(sem$`Total Cost/ Trans.`, by=list(sem$`Publisher Name`), FUN=mean)
# Visualize average costs per transaction per engine
plot_ly(costs_publisher, x = costs_publisher$`Group.1`, y=~`x`)%>%
layout(title = 'Average Costs per Publisher', plot_bgcolor = "#e5ecf6",xaxis = list(title = 'Publisher'),yaxis = list(title = 'Costs / Transaction'))
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#bar
It seems like Google-US has the highest clickthrough rate and the costs / click are unusually high for Yahoo - US. One reason could be the advanced Match Type that gets Air France uses on that engine.
# Total Cost per Transaction - Distribution per Publisher
plot_ly(sem,y = ~`Total Cost/ Trans.`, color = ~`Publisher Name`, type = "box")
# Visualize distribution of Bid Strategies for single Publishers
plot_ly(sem[which(sem$`Publisher Name`=='Google - US'),], x = ~`Publisher Name`, y = ~`Total Cost/ Trans.`, color = ~`Bid Strategy`, type = "box")
# Visualize impressions per campaign
plot_ly(doubleclick_clean, x = doubleclick_clean$`Campaign`, y=~Impressions, type='bar')
library('GGally')
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
# Select all the numerical variables
logic <- sapply(sem, is.numeric)
numerical_var <- sem[,logic]
# Select all the numerical variables
#logic <- sapply(doubleclick_clean, is.numeric)
#numerical_var <- doubleclick_clean[,logic]
numerical_var_standardized <- as.data.frame(scale(numerical_var))
p <- ggpairs(numerical_var_standardized, title="correlogram with ggpairs()")
ggplotly(p)
## Warning: Can only have one: highlight
## Warning: Can only have one: highlight
## Warning: Can only have one: highlight
## Warning: Can only have one: highlight
Most impressions come from unassigned keywords.
# Select observations with the highest total cost per transaction
sem_sub <- subset(sem,subset = `Total Cost/ Trans.` > 0)
# Visualize the costs per transactions for different Publisher
p <- plot_ly(sem_sub, y = ~`Total Cost/ Trans.`, color = I("black"),
alpha = 0.2, boxpoints = "suspectedoutliers")
p1 <- p %>% add_boxplot(x = ~`Publisher Name`)
p1
# Visualize the converted transactions for different bid strategies
convert_bid <- plot_ly(sem_sub, y = ~`Trans. Conv. %`, color = I("black"),
alpha = 0.2, boxpoints = "suspectedoutliers")
p2 <- p %>% add_boxplot(x = ~`Bid Strategy`)
p2
# Visualize the numerical variables in 3D-Space
plot_ly(sem, x = ~`Engine Click Thru %`, y = ~`Trans. Conv. %`, z =~`Total Cost/ Trans.`) %>%
add_markers(color = ~`Trans. Conv. %`)
Keywords
ggplot(data=doubleclick_clean, aes(x=sem$`Trans. Conv.`, y=sem$`Total Cost/ Trans.`, color=sem$`Publisher Name`)) + geom_point() + scale_y_continuous(trans='log10') + scale_x_continuous(trans='log10')
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
